clear
set mem 800m
set more off
capture log close

*do ${fmartorell_home}top_program

*global rawpath "${receipts}/thecb/thecb_07-04/"
* note: this folder is from ${receipts}/thecb/thecb_07-01/
global rawpath "${d1}thecb/thecb_07-01/"
*global data "${fmartorell_home}data"
global data "${d1}data"

*log using ${fmartorell_home}programs/newreport1sr.log, replace
log using ${d1}log/newreport1sr.log, replace
*cd d:\temp
   
/*========================================================================
    REPORT1_04_05 DATA
    UPDATE OF BRIAN'S PROGRAM TO READ IN RAW REPORT 1 DATA FILES    

    This program reads in the raw THECB Report 1 files received 11/1/02 
    for Summer AY 1999 to Spring AY 2002. Then it performs some simple 
    data checks on each file before saving.

    Because some of the data overlap with files that we have already 
    received, the bottom compares these to the read in of the old data.  
    This provides a test of whether the data is identical but not of 
    whether one is read in correctly or not, since this program is 
    based on the program used to read in the older files.
  ========================================================================*/

#delimit ;

/*------------------------------------------------------------------------
    199_3 to 200_2 files 
  ------------------------------------------------------------------------*/

#delimit ;
capture program drop r1in_00_02;
program define r1in_00_02;
  args sem gcyr fileyr;
  local yr=`gcyr'-(`sem'==1);
  

  display _n(2) "-------------------- READ-IN: File Year `gcyr', Semester `sem' --------------------";
  clear;

  infix 
  byte   rectype     1
  double school      2-7
  str9   altpid      8-16
  str1   _sex        17
  str1   _type       18
  int    byr         19-22
  int    bmo         23-24
  int    bda         25-26
  str1   _tutstat    27     
  int    res         28-30     
  double transfer    31-36          
  int    sch_on      37-38          
  int    sch_off     39-40          
  int    docfund     41-42
  byte   _ethnic     43
  byte   semester    44
  int    year        45-48
  int    sch_in      49-50
  byte   flex        51
  byte   remote      52
  double major       53-60
  int    doccode     61-62
  int    ecode       63-64
  int    nofund_c    65-66
  int    nofund_d    67-68
  int    nofund_i    69-70
  int    sch_c       71-72
  int    sch_d       73-74
  int    sch_ug      75-77
  byte   uglimit     78
  str20  unused1     79-98
  str10  unused2     99-108
  str1   unused3     109
  int    sch_dual    110-111
  int    teach_ed    112-113
  str6   unused4     114-119
  str1   update      120
  using "${rawpath}D1SR`sem'0`fileyr'.";
  display in white "Reading file: ${path}D1sr`sem'`fileyr'.t";

  display in white "Drop EOF record:" _continue; drop if school == 445566;
  display in white "Dates--incl. setting bday of 0 to 15:" _continue; generate bdate = mdy(bmo,bda,byr);
  format bdate %dN/D/Y;
  drop byr bmo bda;

  generate cur_year=`gcyr';
  #delimit cr
  
  display in white "Check years"
  tabulate cur_year semester
  tabulate year semester

  display in white "--- CIP Code Merge ---"  
  generate str8 textmajor8=string(major,"%08.0f") 
  generate byte cipcode=real(substr(textmajor8,1,2)) 
  sort cipcode
  *merge cipcode using /mnt/data/tsp1/bbucks/BucksAllPrivate/AuxiliaryData/THECB/aggcipcodes2,nokeep
  merge cipcode using ${d1}data/aggcipcodes2,nokeep
  tabulate _merge
  drop cipcode textmajor8 aggcipcode _merge

  display in white "--- Sex ---"
  generate byte sex = 0 if _sex == "F"
  replace sex = 1 if _sex == "M"
  label define sex 0 "female" 1 "male"
  label values sex sex
  drop _sex

  display in white "--- Ethnic ---"
  generate ethnic=.
  quietly for num 1/7 \ num 5 4 2 3 1 6 7: replace ethnic=X if _ethnic==Y
  label define ethnic 1 "NAT.AM" 2 "ASIAN" 3 "AFR.AM" 4 "HISPANIC" 5 "ANGLO" 6 "FOREIGN" 7 "UNKNOWN"
  label values ethnic ethnic

  display in white "Recode type variable"
  generate byte type=real(_type)
  replace type=10 if _type=="V"
  tabulate type _type,missing wrap

  display in white "Generate new tuition status variable"
  generate byte tutstat=real(_tutstat)
  replace tutstat=10 if _tutstat=="A"
  replace tutstat=11 if _tutstat=="C"
  tabulate tutstat _tutstat,missing 
  drop _tutstat _type

  local count=0
  quietly for var unused*: capture count if X~="" \ local count=`count'+r(N)
    if (`count'>0) {
    display in red _n(1) "Check--Unused variables are NOT empty: `count' ERRORS"
    for var unused*: tabulate X,wrap
  } 
  else {
	display _n(1) in white "Check--Unused variables are empty: OK"
  } 
  drop unused*

  label var rectype  "Record Code (always 1)"
  label var school   "College"
  label var altpid   "EncryptedSSN"
  label var sex      "Gender"
  label var type     "Classification"
  label var bdate    "Birthdate (day set to 15)"
  label var tutstat  "Tuition Status"
  label var res      "Residence"
  label var transfer "Transfer/1st-time Student"
  label var sch_on   "SCH Load - On Campus"
  label var sch_off  "SCH Load - Off Campus"
  label var ethnic   "Race / Ethnicity"
  label var semester "Current Semester"
  label var year     "Calendar Year"
  label var flex     "Flexible Entry"
  label var remote   "Remote Campus"
  label var major    "Major Area (CIP code)"
  label var ecode    "Tuition Exemption Code"
  label var update   "Update code"
  label var cur_year "Calendar Year"
  label var update   "Update Code"
  label var docfund   "Doctoral SCH Funded"
  label var sch_in   "Inter-instit'l SCH"
  label var nofund_c "Collegiate SCH not state funded"
  label var nofund_d "Dev. SCH not state funded"
  label var nofund_i "Inter-inst. SCH not state funded"
  label var doccode  "Doctoral Funding Code"
  label var sch_c "Collegiate SCH state funded"
  label var sch_d "Dev. SCH state funded"
  label var sch_ug "Undergrad prgm SCH"  
  label var uglimit "Affected by UG funding limit"
  label var sch_dual "SCH in dual credit courses"
  label var teach_ed "In teacher educ. program" 
  sort altpid sex school sch_on
  drop _ethnic 
  quietly compress
  sort altpid school sex sch_on sch_off flex doccode docfund
  *save ${fmartorell_home}data/highered/d1sr`gcyr'_`sem',replace
  save ${d1}data/highered/d1sr`gcyr'_`sem',replace
  checks `sem' `gcyr'
end 



capture program drop checks
program define checks
args sem yr

  capture count if (sex~=1 & sex~=0)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Sex NOT 0 or 1: `count' ERRORS"
    tabulate sex,missing
  }
  else {
	display in white "Check--Sex 0 or 1: OK"
  }

  capture count if (type<0 | (type>10 & type~=.))
  local count=r(N)  
  if (`count'>0) {
    display in red _n(1) "Check--Type NOT in 0-10: `count' ERRORS"
    tabulate type,missing
  }
  else {
	display in white "Check--Type in 0-9: OK"
  }

  capture count if ~(tutstat==0 | tutstat==1 | tutstat==2 | tutstat==3 | tutstat==5 | tutstat==9)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Tuition status NOT 0, 1, 2, 3, 5 or 9: `count' ERRORS"
    tabulate school remote,missing
  }
  else {
	display in white "Check--Tuition status 0, 1, 2, 3, 5 or 9: OK"
  }


  capture count if ((type==7 & docfund==.) | (flex==3 & docfund==.))
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Doc SCH funded missing if type==7 or missing if flex==3: `count' ERRORS"
    tabulate docfund type,missing
    tabulate docfund flex,missing
  }
  else {
	display in white "Check--Doc SCH funded not missing if type==7 & non-missing if flex==3: OK"
  }


  capture count if ~(ethnic>=1 & ethnic<=7)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Ethnicity NOT in 1-7: `count' ERRORS"
    tabulate ethnic,missing
  }
  else {
   display in white "Check--Ethnicity in 1-7: OK"
  }

  capture count if (year-`gcyr'+(semester==1)~=1800)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Years and semesters do NOT match: `count' ERRORS"
    display "Gcyr=`gcyr'"
    tabulate cur_year semester,missing
    tabulate year semester,missing
  }
  else {
	display in white "Check--Years and semesters match: OK"
  }


  capture count if (semester~=`sem')
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Semester NOT same as file semester: `count' ERRORS"
    tabulate semester,missing
  }
  else {
	display in white "Check--Semester same as file semester: OK"
  }


  capture count if (school~=3646 & (remote==1 | remote==2))
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--School==3646 (TWU) if 1 or 2: `count' ERRORS"
    tabulate school remote
  }
  else {
	display in white "Check--School==3646 (TWU) if 1 or 2: OK"
  }


  generate lengthcip=length(string(major,"%12.0f"))
  capture count if (lengthcip~=7 & lengthcip~=8)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Major NOT 7-8 digits: `count' ERRORS"
    tabulate lengthcip,missing
  }
  else {
	display in white "Check--Major has 7-8 digits: OK"
  }
  drop lengthcip
  
  capture count if (flex~=1 & flex~=3 & flex~=.)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Flexible Entry NOT only 1 or 3: `count' ERRORS"
    tabulate flex,missing
  }
  else { 
	display in white "Check--Flexible Entry only 1 or 3: OK" 
  }

   
  capture count if (type~=7 & flex==3)
  local count=r(N)
  if (`count'>0) {
    display in red _n(1) "Check--Flexible Entry=3 only for Doctorates ==> Type==7 if flex==3 FALSE: `count' ERRORS"
    tabulate type flex,missing
  }
  else {
   display in white "Check--Flexible Entry=3 only for Doctorates ==> Type==7 if flex==3: OK"
  }
  

  capture count if ((docfund==. | docfund==0) & flex==3)
  local count=r(N)
   if (`count'>0) {
    display in red _n(1) "Check--Flexible Entry=3 only for Doctorates ==> Docfund~=. if flex==3 FALSE: `count' ERRORS"
    tabulate docfund flex,missing
    tabulate doccode flex,missing
  }
  else {
	 display in white "Check--Flexible Entry=3 only for Doctorates ==> Docfund~=. if flex==3: OK"
  }

   
  capture count if ((sch_on~=0 | sch_off~=0) & flex==3)
  local count=r(N)
   if (`count'>0) {
    display in red _n(1) "Check--Flexible Entry=3 => zero-fill sch_on and sch_off FALSE: `count' ERRORS"
    tabulate sch_on if flex==3,missing
    tabulate sch_off if flex==3,missing
  }
  else {
	 display in white "Check--Flexible Entry=3 => zero-fill sch_on and sch_off: OK"
  }

  if `yr'<96{
    capture count if ~((ecode>=1 & ecode<=9) | (ecode>=20 & ecode<=35) | ecode==.)
    local count=r(N)
     if (`count'>0) {
      display in red _n(1) "Check--Tuition exemption code NOT in 1-9 or 20-35 prior to Fall 1995: `count' ERRORS"
      tabulate ecode,missing
    }
    else { 
     display in white "Check--Tuition exemption code in 1-9 or 20-35 prior to Fall 1995: OK"
    }
  }

  else{
    capture count if ~(ecode==. | ecode==21 | ecode==1)
    local count=r(N)
     if (`count'>0) {
      display in red _n(1) "Check--Tuition exemption code NOT 1 or 21: `count' ERRORS"
      quietly generate tutstat3=(tutstat==3)
      label var tutstat3 "Tuition Status==3"
      tabulate ecode tutstat3,missing
      drop tutstat3
    }
    else {
	 display in white "Check--Tuition exemption code is 1 or 21: OK"
    }
  }

  count if ((ecode~=. & tutstat~=3) | (tutstat==3 & ecode==.))
  local count=r(N)
   if (`count'>0) {
    display in red _n(1) "Check--Tuition exemption code NOT only for those with tutstat==3 & non-missing if status==3: `count' ERRORS"
    tabulate ecode tutstat,missing
  }
  else {
	 display in white "Check--Tuition exemption code NOT only for those with tutstat==3 & non-missing if status==3: OK"
  }


  generate flex2=0
  quietly replace flex2=flex if flex~=.
  sort altpid sex school flex2
  by altpid sex school: generate countN=_N
  by altpid sex school: generate countn=_n
  capture count if (countn>1 & countn~=. & flex2==0)
  local count=r(N)
   if (`count'>0) {
    display in red _n(1) "Check--Duplicate records NOT accounted for by flex-entry: `count' ERRORS"
    table countn flex2 countN
  } 
  else {
	 display in white "Check--Duplicate records accounted for by flex-entry: OK"
  }
  drop countn countN flex2
end


r1in_00_02 1 204 3
r1in_00_02 2 204 4
r1in_00_02 3 204 4
r1in_00_02 1 205 4
r1in_00_02 2 205 5
r1in_00_02 3 205 5

/*
foreach X of numlist 199/201{
   foreach sem in 3 1 2{
      local gcyr=`X'+(`sem'~=3)
      local fileyr=substr(string(`gcyr'-(`sem'==1),"%3.0f"),2,2)
      r1in_00_02 `sem' `gcyr' `fileyr'
   }
}


 cap program drop cfvold
program define cfvold
  args yr sem
  use ${path}brandspankingnew_d1sr`yr'_`sem',clear 
  sort altpid school sex sch_on sch_off flex doccode docfund
  save d:\temp\sorted,replace

  use //raptor/raptorf/users/bbucks/auxiliarydata/report1/new_d1sr`yr'_`sem'
  sort altpid sex school sch_on sch_off flex doccode docfund
  cf _all using d:\temp\sorted,verbose
end

for X in num 1/3: for Y in num 200/201: cfvold Y X
*/

log close

